# mako/filters.py
# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
#
# This module is part of Mako and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php


import codecs
import re

from mako import compat
from mako.compat import codepoint2name
from mako.compat import name2codepoint
from mako.compat import quote_plus
from mako.compat import unquote_plus

xml_escapes = {
    "&": "&amp;",
    ">": "&gt;",
    "<": "&lt;",
    '"': "&#34;",  # also &quot; in html-only
    "'": "&#39;",  # also &apos; in html-only
}

# XXX: &quot; is valid in HTML and XML
#      &apos; is not valid HTML, but is valid XML


def legacy_html_escape(s):
    """legacy HTML escape for non-unicode mode."""
    s = s.replace("&", "&amp;")
    s = s.replace(">", "&gt;")
    s = s.replace("<", "&lt;")
    s = s.replace('"', "&#34;")
    s = s.replace("'", "&#39;")
    return s


try:
    import markupsafe

    html_escape = markupsafe.escape
except ImportError:
    html_escape = legacy_html_escape


def xml_escape(string):
    return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)


def url_escape(string):
    # convert into a list of octets
    string = string.encode("utf8")
    return quote_plus(string)


def legacy_url_escape(string):
    # convert into a list of octets
    return quote_plus(string)


def url_unescape(string):
    text = unquote_plus(string)
    if not is_ascii_str(text):
        text = text.decode("utf8")
    return text


def trim(string):
    return string.strip()


class Decode(object):
    def __getattr__(self, key):
        def decode(x):
            if isinstance(x, compat.text_type):
                return x
            elif not isinstance(x, compat.binary_type):
                return decode(str(x))
            else:
                return compat.text_type(x, encoding=key)

        return decode


decode = Decode()


_ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z")


def is_ascii_str(text):
    return isinstance(text, str) and _ASCII_re.match(text)


################################################################


class XMLEntityEscaper(object):
    def __init__(self, codepoint2name, name2codepoint):
        self.codepoint2entity = dict(
            [
                (c, compat.text_type("&%s;" % n))
                for c, n in codepoint2name.items()
            ]
        )
        self.name2codepoint = name2codepoint

    def escape_entities(self, text):
        """Replace characters with their character entity references.

        Only characters corresponding to a named entity are replaced.
        """
        return compat.text_type(text).translate(self.codepoint2entity)

    def __escape(self, m):
        codepoint = ord(m.group())
        try:
            return self.codepoint2entity[codepoint]
        except (KeyError, IndexError):
            return "&#x%X;" % codepoint

    __escapable = re.compile(r'["&<>]|[^\x00-\x7f]')

    def escape(self, text):
        """Replace characters with their character references.

        Replace characters by their named entity references.
        Non-ASCII characters, if they do not have a named entity reference,
        are replaced by numerical character references.

        The return value is guaranteed to be ASCII.
        """
        return self.__escapable.sub(
            self.__escape, compat.text_type(text)
        ).encode("ascii")

    # XXX: This regexp will not match all valid XML entity names__.
    # (It punts on details involving involving CombiningChars and Extenders.)
    #
    # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
    __characterrefs = re.compile(
        r"""& (?:
                                          \#(\d+)
                                          | \#x([\da-f]+)
                                          | ( (?!\d) [:\w] [-.:\w]+ )
                                          ) ;""",
        re.X | re.UNICODE,
    )

    def __unescape(self, m):
        dval, hval, name = m.groups()
        if dval:
            codepoint = int(dval)
        elif hval:
            codepoint = int(hval, 16)
        else:
            codepoint = self.name2codepoint.get(name, 0xFFFD)
            # U+FFFD = "REPLACEMENT CHARACTER"
        if codepoint < 128:
            return chr(codepoint)
        return chr(codepoint)

    def unescape(self, text):
        """Unescape character references.

        All character references (both entity references and numerical
        character references) are unescaped.
        """
        return self.__characterrefs.sub(self.__unescape, text)


_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint)

html_entities_escape = _html_entities_escaper.escape_entities
html_entities_unescape = _html_entities_escaper.unescape


def htmlentityreplace_errors(ex):
    """An encoding error handler.

    This python codecs error handler replaces unencodable
    characters with HTML entities, or, if no HTML entity exists for
    the character, XML character references::

        >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
        'The cost was &euro;12.'
    """
    if isinstance(ex, UnicodeEncodeError):
        # Handle encoding errors
        bad_text = ex.object[ex.start : ex.end]
        text = _html_entities_escaper.escape(bad_text)
        return (compat.text_type(text), ex.end)
    raise ex


codecs.register_error("htmlentityreplace", htmlentityreplace_errors)


# TODO: options to make this dynamic per-compilation will be added in a later
# release
DEFAULT_ESCAPES = {
    "x": "filters.xml_escape",
    "h": "filters.html_escape",
    "u": "filters.url_escape",
    "trim": "filters.trim",
    "entity": "filters.html_entities_escape",
    "unicode": "unicode",
    "decode": "decode",
    "str": "str",
    "n": "n",
}

if compat.py3k:
    DEFAULT_ESCAPES.update({"unicode": "str"})

NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape"
NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape"
